/*
* Programs required:
ssc install _gwtmean
ssc install rangestat
ssc install winsor2
ssc install ranktest
cap ado uninstall ftools
net install ftools, from("https://raw.githubusercontent.com/sergiocorreia/ftools/master/src/")
cap ado uninstall reghdfe
net install reghdfe, from("https://raw.githubusercontent.com/sergiocorreia/reghdfe/master/src/")
cap ado uninstall ivreghdfe
cap ssc install ivreg2 // Install ivreg2, the core package
net install ivreghdfe, from(https://raw.githubusercontent.com/sergiocorreia/ivreghdfe/master/src/)
*/

/*
Order of running files for the state-group panel analysis
(1 or 2) Run ConstructingMW-bian.do
(1 or 2) MORG_cleaner.do
(3) This file
*/

/*
Inputs required
(1) GDPDEF_qtr.csv from FRED
(2) wage`year'.dta constructed in MORG_cleaner.do
(3) cps-to-census.dta concordance between state codes
(4) state_yb_combo.dta constructed in ConstructingMW-bian.do
*/

clear all
pause off 
set matsize 5000
set maxvar 20000

graph set window fontface "Georgia"

cd "/Users/jvogel/Dropbox/JvogelPrivateWork/Cannonical_Model_MW/Replication/Data"

************************
* Choose globals related to time changes
************************

global diff=7 // periods of difference in the DV [6 - 8, using 7 with 6 and 8 as robustness]
global diff_fixed=${diff}
global maxlag=${diff}+6  // number of lagged shocks (including current) [${diff} +6]
global maxlead=${diff} // number of lead shocks [${diff} report first 4]

************************
*CREATING biannual GDP deflator from FRED data
************************
clear
import delimited Input/GDPDEF_qtr.csv // From FRED
gen year=real(substr(date,1,4))
gen month=real(substr(date,6,2))
gen bian=.
assert month~=.
replace bian=1 if month<=6  & bian==.
replace bian=2 if month<=12 & bian==.
* getting max across quarters
bys year bian: egen gdp=max(gdpdef)

replace gdpdef=gdp
keep gdpdef year bian
duplicates drop
save Output/gdpDeflatorFRED_bian.dta, replace

/************************************************
************************************************
Bring in MORG data cleaned in MORG_cleaner.do
Collapse relevant variables to half-years (yb) x labor bins (lbin)
************************************************
************************************************/

use Output/MORGcleaned/wage79, clear
  keep year state weight edu age race male wage hours use intmonth
forval y=80(1)113 {
  display "year is `y'"
  append using Output/MORGcleaned/wage`y'
  keep year state weight edu age race male wage hours use intmonth
}
append using Output/MORGcleaned/wage114
  keep year state weight edu age race male wage hours stfips use intmonth
forval y=115(1)118 {
  display "year is `y'"
  append using Output/MORGcleaned/wage`y'
  keep year state weight edu age race male wage hours stfips use intmonth
}
* drop value labels
label drop _all

rename stfips Statefips
drop if state==53 | Statefips==11 // dropping DC
*getting all at statefip
merge m:1 state using Input/cps-to-census.dta
rename state_cen statefips
assert statefips==Statefips if Statefips~=. & statefip~=.
replace statefips=Statefips if statefip==.
assert statefips~=.
drop Statefips abbrev statename state _merge
drop if statefips==11

**** Create biannual variable (bian) and then year-bian variable (yb)
gen bian=.
assert intmonth~=.
replace bian=1 if intmonth<=6  & bian==.
replace bian=2 if intmonth<=12 & bian==.
assert bian~=.
sort year bian
egen yb=group(year bian)

****** DROP IF YEAR = 1994 (no allocation flags) YEAR = 1995 before Sept. (allocation flags fraction of the year)
drop if year==1994 | (year==1995 & intmonth<9)
* note that only have 4 months in second half of 1995

************************
* Labor bins
************************

* college indicator
gen col=(edu==4 | edu==5)

gen white=(race==1)
assert white==0 | white==1
assert male==0 | male==1
assert edu>=1 & edu<=5

gen agebin=.
replace agebin=1 if age<26 & agebin==.	 // 10 years
replace agebin=2 if age<36 & agebin==.	 // 10 years
replace agebin=3 if age<46 & agebin==.   // 10 years
replace agebin=4 if age<56 & agebin==.	 // 10 years
replace agebin=5 if age>=56 & agebin==. // 9 years
assert agebin~=.

tostring agebin male white edu, replace
gen lbin= agebin+male+white+edu
label variable lbin "agebin (5) male (2) white (2) edu (5)"

************************
* Sample selection
************************

keep if use==1 // keep only those whose wage data is clean
keep if wage>0 & hours>0

************************
*Getting data in final form
************************

replace hours=hours*weight  // hours and income only from those in wage sample
gen inc=hours*wage   // hours and income only from those in wage sample

*collapse
collapse (sum) weight hours inc, by(lbin year bian yb col statefip) // Constructing the log of the average wage, not average of log wage

* From nominal to real (in 2012 dollars)
merge m:1 year bian using Output/gdpDeflatorFRED_bian.dta  // Constructed above in this do file
assert _merge~=1
drop if _merge==2
drop _merge
replace inc=100*inc/gdp
drop gdp

* Fill in missing lbin x yb x statefip observations
preserve
  keep year bian yb
  duplicates drop
  save /tmp/tmp, replace
restore

drop year bian
reshape wide hours inc weight, i(statefips lbin) j(yb)
reshape long hours inc weight, i(statefips lbin) j(yb)
foreach var in hours inc {
  replace `var' = 0 if `var'==.
}
merge m:1 yb using /tmp/tmp
assert _merge==3
drop _merge

* lbin-specific wage
gen wage=inc/hours
gen lnwage=ln(wage)

**** YEAR CHOICE
keep if year<=2016

* Saving data
save Output/MORG_CMcomplete_bian.dta, replace


************************************************
************************************************
* Get state-yb-specific share of earnings <= cut*mw (for cut = 100, 105, 110)
************************************************
************************************************

* This is related to the approach in Bailey, DiNardo, and Stuart (2020) and Derenoncourt and Montialoux (2020)
* But they use the share of workers rather than income

use Output/MORGcleaned/wage79, clear
  keep year state weight edu age race male wage hours use intmonth
forval y=80(1)113 {
  display "year is `y'"
  append using Output/MORGcleaned/wage`y'
  keep year state weight edu age race male wage hours use intmonth
}
append using Output/MORGcleaned/wage114
  keep year state weight edu age race male wage hours stfips use intmonth
forval y=115(1)118 {
  display "year is `y'"
  append using Output/MORGcleaned/wage`y'
  keep year state weight edu age race male wage hours stfips use intmonth
}
* drop value labels
label drop _all

rename stfips Statefips
drop if state==53 | Statefips==11 // dropping DC
*getting all at statefip
merge m:1 state using Input/cps-to-census.dta
rename state_cen statefips
assert statefips==Statefips if Statefips~=. & statefip~=.
replace statefips=Statefips if statefip==.
assert statefips~=.
drop Statefips abbrev statename state _merge
drop if statefips==11

**** Create quarters and then year-quarter variable (yb)
gen bian=.
assert intmonth~=.
replace bian=1 if intmonth<=6 & bian==.
replace bian=2 if intmonth<=12 & bian==.
assert bian~=.
sort year bian
egen yb=group (year bian)
assert yb==(year-1979)*2+bian

**** Sample selection
keep if use==1 // keep only those whose wage data is clean
keep if wage>0 & hours>0

****** DROP IF YEAR = 1994 (no allocation flags) YEAR = 1995 before Sept. (allocation flags fraction of the year)
drop if year==1994 | (year==1995 & intmonth<9)

* create labor bins
gen white=(race==1)
assert white==0 | white==1
assert male==0 | male==1
assert edu>=1 & edu<=5

gen agebin=.
replace agebin=1 if age<26 & agebin==.	 // 10 years
replace agebin=2 if age<36 & agebin==.	 // 10 years
replace agebin=3 if age<46 & agebin==.     // 10 years
replace agebin=4 if age<56 & agebin==.	 // 10 years
replace agebin=5 if age>=56 & agebin==.    // 9 years
assert agebin~=.

tostring agebin male white edu, replace
gen lbin= agebin+male+white+edu
label variable lbin "agebin (5) male (2) white (2) edu (5)"

* merging with minimum wage data (nominal)

merge m:1 statefip year bian using Output/state_yb_combo.dta // Constructed in ConstructingMW-bian
assert (year<1979 | year>2016 | year==1994 | (year==1995 & bian==1) | statefip==11)  if _merge~=3
drop if _merge~=3
drop _merge

bys state year bian lbin: egen Inc_sly=sum(wage*weight*hours)
**** Loop over three values of cut used in the analysis: 100, 105, 110
forval c=100(5)110{
  preserve
  gen I`c'=(wage<=mw*`c'/100)
  assert I`c' ~=.
  bys state year bian lbin: egen Inc_slby`c'=sum(wage*weight*hours*I`c')
  gen share`c'=Inc_slby`c'/Inc_sly

  keep statefip year bian lbin share*
  duplicates drop

  save Output/MORG`c'_shares_bian.dta, replace
  restore
}


************************************************
************************************************
* Real minimum-wage average one period change
*   averaged across all states and all one-period changes btw 1979-2016, including all 1994 and 1995
************************************************
************************************************

use Output/state_yb_combo.dta, clear  // Constructed in ConstructingMW-bian
merge m:1 year bian using Output/gdpDeflatorFRED_bian.dta // Constructed in this do file above
assert (year<1979 | year>2016)  if _merge~=3
drop if _merge~=3 | year<1979 // I will use minimum wage changes over the sample period (not earlier), but also including 1994 and all of 1995
drop _merge
replace mw=100*mw/gdp // create real minimum wage
drop gdp
replace mw=log(mw)

* create the yb variable (note that I include 1994 and first half of 1995)
sort year bian
egen yb=group(year bian)
assert yb==(year-1979)*2+bian

* tsset the data for changes
tsset state yb

* construct the state-and-time changes
gen dmw=mw-L1.mw
* average them across all states and times
egen Dmw=mean(dmw)

keep D
gen c=1
duplicates drop

save Output/Dmw_avg_bian.dta, replace


************************************************
************************************************
* Results at baseline difference length
************************************************
************************************************

****************************************
*** Put together all data and prepare for estimation
****************************************

*** Put data together

* Bringing in wage data by labor bin
use Output/MORG_CMcomplete_bian.dta, clear  // Constructed in this do file above

* Bringing in state-bian real minimum wages
merge m:1 statefip year bian using Output/state_yb_combo.dta // Constructed in ConstructingMW-bian.do
assert (year<1979 | year>2016 | year==1994 | (year==1995 & bian==1) | statefip==11)  if _merge~=3
drop if _merge~=3
drop _merge

* Bring in the GDP deflator to create log(real mw)
merge m:1 year bian using Output/gdpDeflatorFRED_bian.dta // Constructed in this do file above
assert (year<1979 | year>2016 | year==1994 | (year==1995 & bian==1))  if _merge~=3
drop if _merge~=3
drop _merge
replace mw=100*mw/gdp // create real minimum wage
drop gdp
replace mw=ln(mw)
label variable mw "real state-yb mw (gdp deflator)"

* Bring in the bite measures
merge m:1 statefip year bian lbin using Output/MORG105_shares_bian.dta // Constructed in this do file above
assert (wage==. | lnwage==. | weight==0 | weight==.) if _merge~=3
drop _merge

* Bring in the avg one-period change in the real mw
gen c=1
merge m:1 c using Output/Dmw_avg_bian.dta // Constructed in this do file above
assert _merge==3  
drop _merge

* Create leave-out version of median share bound by mw: requires installing rangestat: ssc install rangestat
egen group_sl=group(state lbin)
egen low=min(yb)
egen high=max(yb)
rangestat (median) bound = share105, by(group_sl) excludeself interval(yb, low, high)
label variable bound "Leave-out median share105"

* Create the NON-leave-out version of the median share bound by mw (for one robustness?)
bys group_sl: egen Bound=median(share105)
label variable Bound "median share105 without leave out"

* Create indicator for balanced sample
gen count=(wage~=. & hours~=0 & inc~=0)
bys lbin statefip: egen num=sum(count)
sum num, det
gen keep=(num==r(max)) // indicator if state x group present in all periods

* indicators for characteristics
gen age=real(substr(lbin,1,1))
gen male=real(substr(lbin,2,1))
gen white=real(substr(lbin,3,1))
gen edu=real(substr(lbin,4,1))
egen LBIN=group(lbin)

* winsorize lnwage separately by (state x lbin)
gen lnwage_w=lnwage
winsor2 lnwage_w, replace cuts(2 98) by(statefips lbin)

*** Prepare for regression analysis

* tsset
egen state_lbin=group(statefips lbin)
drop count
tsset state_lbin yb
* yb takes into account the jump btw 1993q4 and 1995q4 (has jumps)

local maxlag=${maxlag}
local maxlag1=`maxlag'-1
gen dw_s = lnwage_w-L${diff}.lnwage_w
gen Dweight=(weight*L${diff}.weight)/(weight+L${diff}.weight)  // variable weight is the weighted number of workers

forval Lag=0(1)`maxlag1' {
  local Lag1=`Lag'+1
  gen dmw_`Lag'=L`Lag'.mw-L`Lag1'.mw
  gen dz_Inter_`Lag'=(dmw_`Lag'-Dmw)*L`Lag1'.bound
  gen d_Inter_`Lag'=(dmw_`Lag'-Dmw)*L`Lag1'.share${cut}

}
forval Lead=1(1)$maxlead {
  local Lead1=`Lead'-1
  gen dFmw_`Lead'=F`Lead'.mw-F`Lead1'.mw
  gen dz_FInter_`Lead'=(dFmw_`Lead'-Dmw)*L`Lead1'.bound
  gen d_FInter_`Lead'=(dFmw_`Lead'-Dmw)*L`Lead1'.share${cut}
}

****************************************
*** Baseline 2SLS
****************************************

preserve

ivreghdfe dw_s (d_Inter_* d_FInter_* = dz_Inter* dz_FInter* ) if keep==1 [aw=Dweight], absorb(i.yb i.yb#c.bound) cluster(statefip)
local maxlag1=$diff - 2 // Must include up to & including T-1, but no higher. This is achieved by averaging the T-2 (when z = `maxlag1') and T-1 lag weight (see below where `k'=`z'+1)

gen bL_0=_b[d_Inter_0]
gen seL_0=_se[d_Inter_0]
forval z=0(1)`maxlag1' {
  local k=`z'+1
  lincom (d_Inter_`z'+d_Inter_`k')/2
  gen bL_`k'=r(estimate)
  gen seL_`k'=r(se)
}
forval z=1(1)4 {
  local k=`z'+1
  lincom (-d_FInter_`z'-d_FInter_`k')/2  // converting changes to `levels'-like results for leads
  gen bF_`z'=r(estimate)
  gen seF_`z'=r(se)
}
keep bL* bF* se*
duplicates drop

* reshaping
gen c=1
reshape long bL_ bF_ seL_ seF_  , i(c) j(t)
* put lag and lead together
gen T=-t
save /tmp/tmp.dta, replace
keep t bL seL
append using /tmp/tmp.dta
drop if c==1 & bF==.
replace t=T if T~=.
replace bL = bF if bF~=.
replace seL = seF if seF~=.
drop c bF seF T
rename se* se
rename b* b
gen ub=b+1.645*se
gen lb=b-1.645*se

replace t=t/2

* save for comparison to other outputs
save "../Figures/temp/cut_comparison.dta", replace

twoway (rcap ub lb t, lwidth(thick) lc(dknavy %100)) ///
  (scatter b t, ms(O) msize(vlarge) mfc(red) mlc(dknavy)), ///
  ytitle("Magnification elasticities", size(large)) xtitle("Event time (in years)",size(large) margin(small)) xsize(8)  ///
  ylab(, labsize(large) nogrid) xlab(-2(.5)3, labsize(large)) graphregion(color(white)) yline(0, lcolor(black)) ///
  legend(off) 
  graph export "../Figures/fig_main.pdf", as(pdf) replace

restore

****************************************
*** Baseline Reduced Form
****************************************

preserve

reghdfe dw_s dz_Inter* dz_FInter* if keep==1 [aw=Dweight], absorb(i.yb i.yb#c.bound) cluster(statefip)
local maxlag1=$diff - 2
gen bL_0=_b[dz_Inter_0]
gen seL_0=_se[dz_Inter_0]
forval z=0(1)`maxlag1' {
  local k=`z'+1
  lincom (dz_Inter_`z'+dz_Inter_`k')/2
  gen bL_`k'=r(estimate)
  gen seL_`k'=r(se)
}
forval z=1(1)4 {
  local k=`z'+1
  lincom (-dz_FInter_`z'-dz_FInter_`k')/2  // converting changes to levels for leads
  gen bF_`z'=r(estimate)
  gen seF_`z'=r(se)
}
keep bL* bF* se*
duplicates drop

* reshaping
gen c=1
reshape long bL_ bF_ seL_ seF_  , i(c) j(t)
* put lag and lead together
gen T=-t
save /tmp/tmp.dta, replace
keep t bL seL
append using /tmp/tmp.dta
drop if c==1 & bF==.
replace t=T if T~=.
replace bL = bF if bF~=.
replace seL = seF if seF~=.
drop c bF seF T
rename se* se
rename b* b
gen ub=b+1.645*se
gen lb=b-1.645*se

replace t=t/2

twoway (rcap ub lb t, lwidth(thick) lc(dknavy %100)) ///
  (scatter b t, ms(O) msize(vlarge) mfc(red) mlc(dknavy)), ///
  ytitle("Lag weights", size(large)) xtitle("Event time (in years)",size(large) margin(small)) xsize(8)  ///
  ylab(, labsize(large) nogrid) xlab(-2(.5)3, labsize(large)) graphregion(color(white)) yline(0, lcolor(black)) ///
  legend(off) 
  graph export "../Figures/fig_mainRF.pdf", as(pdf) replace

restore

****************************************
*** Figure(s) for heterogeneity analysis
****************************************

preserve

forval j=1(1)9 {
  if `j'==1 {
    local samp "keep==1 & edu<=3" // education: non-college
  }
  if `j'==2 {
    local samp "keep==1 & male==0"  // gender: female
  }
  if `j'==3 {
    local samp "keep==1 & male==1"  // gender: male
  }
  if `j'==4 {
    local samp "keep==1 & age<3"  // age: young
  }
  if `j'==5 {
    local samp "keep==1 & white==1"  // race: white
  }
  if `j'==6 {
    local samp "keep==1 & white==0"  // race: non-white
  }
  if `j'==7 {
    local samp "keep==1 & age>=3"  // age: old
  }
  if `j'==8 {
    local samp "keep==1 & edu>3"  // education: college
  }
  if `j'==9 {
    assert col==0 | col==1
    local samp "(col==0 | col==1)"  // unbalanced panel (not included in paper but good to know)
  }

  ivreghdfe dw_s (d_Inter_* d_FInter_* = dz_Inter* dz_FInter* ) if `samp' [aw=Dweight], absorb(i.yb i.yb#c.bound) cluster(statefip)
  local maxlag1=$diff - 2
  gen bL`j'_0=_b[d_Inter_0]
  gen seL`j'_0=_se[d_Inter_0]
  forval z=0(1)`maxlag1' {
    local k=`z'+1
    lincom (d_Inter_`z'+d_Inter_`k')/2
    gen bL`j'_`k'=r(estimate)
    gen seL`j'_`k'=r(se)
  }
  forval z=1(1)4 {
    local k=`z'+1
    lincom (-d_FInter_`z'-d_FInter_`k')/2  // converting changes to levels for leads
    gen bF`j'_`z'=r(estimate)
    gen seF`j'_`z'=r(se)
  }
}
keep bL* bF* se*
duplicates drop
* reshaping to have all eight series in together
gen c=1
forval j=1(1)9 {
  gen bF`j'_0=.
  gen seF`j'_0=.
}
reshape long bL1_ bF1_ seL1_ seF1_  bL2_ bF2_ seL2_ seF2_  bL3_ bF3_ seL3_ seF3_  bL4_ bF4_ seL4_ seF4_  bL5_ bF5_ seL5_ seF5_  bL6_ bF6_ seL6_ seF6_  bL7_ bF7_ seL7_ seF7_  bL8_ bF8_ seL8_ seF8_ bL9_ bF9_ seL9_ seF9_ , i(c) j(t)

gen T=-t
save /tmp/tmp.dta, replace
keep t bL* seL*
append using /tmp/tmp.dta
drop if c==1 & bF1_==.
replace t=T if T~=.
forval j=1(1)9 {
  replace bL`j'_ = bF`j'_ if bF`j'_~=.
  replace seL`j'_ = seF`j'_ if seF`j'_~=.
}
drop c bF* seF* T
replace t=t/2
forval j=1(1)9 {
  rename bL`j'_ b`j'
  rename seL`j'_ se`j'
}

reshape long b se, i(t) j(j)
gen ub=b+1.645*se
gen lb=b-1.645*se

* Restrict to non-college, female, young, and white
replace t=t+1/12 if j==2 // move dates to be equally spaced
replace t=t+2/12 if j==4 // move dates to be equally spaced
replace t=t+3/12 if j==5 // move dates to be equally spaced
twoway  (rcap ub lb t if j==1, lwidth(thick) lc(dknavy %100)) ///
  (scatter b t if j==1, ms(O) msize(large) mfc(dknavy) mlc(dknavy)) ///
  (rcap ub lb t if j==2, lwidth(thick) lc(maroon %100)) ///
  (scatter b t if j==2, ms(T) msize(large) mfc(maroon) mlc(maroon)) ///
  (rcap ub lb t if j==4, lwidth(thick) lc(dkorange %100)) ///
  (scatter b t if j==4, ms(D) msize(large) mfc(dkorange) mlc(dkorange)) ///
  (rcap ub lb t if j==5, lwidth(thick) lc(forest_green %100)) ///
  (scatter b t if j==5, ms(S) msize(large) mfc(forest_green) mlc(forest_green)), ///
  ytitle("Magnification elasticities", size(large)) xtitle("Event time (in years)",size(large) margin(small)) xsize(8)  ///
  ylab(, labsize(large) nogrid) xlab(-2(.5)3, labsize(large)) graphregion(color(white)) yline(0, lcolor(black)) ///
  legend(pos(5) ring(0) order(2 "non-college" 4 "female" 6 "young" 8 "white") )
  graph export "../Figures/fig_sample.pdf", as(pdf) replace

* Restrict to male and non-white
replace t=t+1/12 if j==6 
twoway (rcap ub lb t if j==3, lwidth(thick) lc(dknavy %100)) ///
  (scatter b t if j==3, ms(O) msize(large) mfc(dknavy) mlc(dknavy)) ///
  (rcap ub lb t if j==6, lwidth(thick) lc(maroon %100)) ///
  (scatter b t if j==6, ms(T) msize(large) mfc(maroon) mlc(maroon)), ///
  ytitle("Magnification elasticities", size(large)) xtitle("Event time (in years)",size(large) margin(small)) xsize(8)  ///
  ylab(, labsize(large) nogrid) xlab(-2(.5)3, labsize(large)) graphregion(color(white)) yline(0, lcolor(black)) ///
  legend(pos(5) ring(0) order(2 "male" 4 "non-white") )
  graph export "../Figures/fig_sample_male_nonwhite.pdf", as(pdf) replace

* Restrict to the two groups not working at mw: college and older
replace t=t+1/12 if j==8
twoway  (rcap ub lb t if j==7, lwidth(thick) lc(dknavy %100)) ///
  (scatter b t if j==7, ms(O) msize(large) mfc(dknavy) mlc(dknavy)) ///
  (rcap ub lb t if j==8, lwidth(thick) lc(maroon %100)) ///
  (scatter b t if j==8, ms(T) msize(large) mfc(maroon) mlc(maroon)), ///
  ytitle("Magnification elasticities", size(large)) xtitle("Event time (in years)",size(large) margin(small)) xsize(8)  ///
  ylab(, labsize(large) nogrid) xlab(-2(.5)3, labsize(large)) graphregion(color(white)) yline(0, lcolor(black)) ///
  legend(pos(5) ring(0) order(2 "older" 4 "college" ) )
  graph export "../Figures/fig_sample_old_college.pdf", as(pdf) replace

* Compare balanced to unbalanced panel
keep if j==9
append using "../Figures/temp/cut_comparison.dta"
replace t=t+1/12 if j==9 // move unbalanced panel dates to zero (on impact)
replace j=0 if j==.
twoway  (rcap ub lb t if j==0, lwidth(thick) lc(dknavy %100)) ///
  (scatter b t if j==0, ms(O) msize(large) mfc(dknavy) mlc(dknavy)) ///
  (rcap ub lb t if j==9, lwidth(thick) lc(maroon %100)) ///
  (scatter b t if j==9, ms(T) msize(large) mfc(maroon) mlc(maroon)), ///
  ytitle("Magnification elasticities", size(large)) xtitle("Event time (in years)",size(large) margin(small)) xsize(8)  ///
  ylab(, labsize(large) nogrid) xlab(-2(.5)3, labsize(large)) graphregion(color(white)) yline(0, lcolor(black)) ///
  legend(pos(5) ring(0) order(2 "Balanced Panel" 4 "Unbalanced Panel" ) )
  graph export "../Figures/fig_sample_unbalanced.pdf", as(pdf) replace
  
restore

****************************************
*** Figure for sample year inclusion
****************************************

preserve

forval j=1(1)2 {
  if `j'==1 {
    local samp "keep==1 & year<=2010" // exclude later years
  }
  if `j'==2 {
    local samp "keep==1 & year>=2000"  // exclude earlier years
  }

  ivreghdfe dw_s (d_Inter_* d_FInter_* = dz_Inter* dz_FInter* ) if `samp' [aw=Dweight], absorb(i.yb i.yb#c.bound) cluster(statefip)
  local maxlag1=$diff - 2
  gen bL`j'_0=_b[d_Inter_0]
  gen seL`j'_0=_se[d_Inter_0]
  forval z=0(1)`maxlag1' {
    local k=`z'+1
    lincom (d_Inter_`z'+d_Inter_`k')/2
    gen bL`j'_`k'=r(estimate)
    gen seL`j'_`k'=r(se)
  }
  forval z=1(1)4 {
    local k=`z'+1
    lincom (-d_FInter_`z'-d_FInter_`k')/2  // converting changes to levels for leads
    gen bF`j'_`z'=r(estimate)
    gen seF`j'_`z'=r(se)
  }
}
keep bL* bF* se*
duplicates drop
* reshaping to have all eight series in together
gen c=1
forval j=1(1)2 {
  gen bF`j'_0=.
  gen seF`j'_0=.
}
reshape long bL1_ bF1_ seL1_ seF1_  bL2_ bF2_ seL2_ seF2_ , i(c) j(t)

gen T=-t
save /tmp/tmp.dta, replace
keep t bL* seL*
append using /tmp/tmp.dta
drop if c==1 & bF1_==.
replace t=T if T~=.
forval j=1(1)2 {
  replace bL`j'_ = bF`j'_ if bF`j'_~=.
  replace seL`j'_ = seF`j'_ if seF`j'_~=.
}
drop c bF* seF* T
replace t=t/2
forval j=1(1)2 {
  rename bL`j'_ b`j'
  rename seL`j'_ se`j'
}
reshape long b se, i(t) j(j)
gen ub=b+1.645*se
gen lb=b-1.645*se

* Figure
replace t=t+1/12 if j==2
twoway  (rcap ub lb t if j==1, lwidth(thick) lc(dknavy %100)) ///
  (scatter b t if j==1, ms(O) msize(large) mfc(dknavy) mlc(dknavy)) ///
  (rcap ub lb t if j==2, lwidth(thick) lc(maroon %100)) ///
  (scatter b t if j==2, ms(T) msize(large) mfc(maroon) mlc(maroon)), ///
  ytitle("Magnification elasticities", size(large)) xtitle("Event time (in years)",size(large) margin(small)) xsize(8)  ///
  ylab(, labsize(large) nogrid) xlab(-2(.5)3, labsize(large)) graphregion(color(white)) yline(0, lcolor(black)) ///
  legend(pos(5) ring(0) order(2 "1979 {&minus} 2010" 4 "2000 {&minus} 2016") )
  graph export "../Figures/fig_years.pdf", as(pdf) replace

restore

****************************************
*** Figure for alternative estimating equations (specification checks)
****************************************

preserve

forval j=1(1)3 {

  if `j'==1 {
    local abs " absorb(i.yb i.yb#c.bound i.statefip#c.bound) "
  }
  if `j'==2 {
    local abs " absorb(i.yb i.yb#c.bound i.statefip#i.yb) "
  }
  if `j'==3 {
    local abs " absorb(i.yb i.yb#c.bound i.LBIN#i.yb) "
  }

  ivreghdfe dw_s (d_Inter_* d_FInter_* = dz_Inter* dz_FInter*) if keep==1 [aw=Dweight], `abs' cluster(statefip)
  local maxlag1=$diff - 2 // b/c I add one using `k' below
  gen bL`j'_0=_b[d_Inter_0]
  gen seL`j'_0=_se[d_Inter_0]
  forval z=0(1)`maxlag1' {
    local k=`z'+1
    lincom (d_Inter_`z'+d_Inter_`k')/2
    gen bL`j'_`k'=r(estimate)
    gen seL`j'_`k'=r(se)
  }
  forval z=1(1)4 {
    local k=`z'+1
    lincom (-d_FInter_`z'-d_FInter_`k')/2  // converting changes to levels for leads
    gen bF`j'_`z'=r(estimate)
    gen seF`j'_`z'=r(se)
  }
}
keep bL* bF* se*
duplicates drop
* reshaping to have all series in together
gen c=1
forval j=1(1)3 {
  gen bF`j'_0=.
  gen seF`j'_0=.
}
reshape long bL1_ bF1_ seL1_ seF1_  bL2_ bF2_ seL2_ seF2_  bL3_ bF3_ seL3_ seF3_ , i(c) j(t)

gen T=-t
save /tmp/tmp.dta, replace
keep t bL* seL*
append using /tmp/tmp.dta
drop if c==1 & bF1_==.
replace t=T if T~=.
forval j=1(1)3 {
  replace bL`j'_ = bF`j'_ if bF`j'_~=.
  replace seL`j'_ = seF`j'_ if seF`j'_~=.
}
drop c bF* seF* T
replace t=t/2
forval j=1(1)3 {
  rename bL`j'_ b`j'
  rename seL`j'_ se`j'
}
reshape long b se, i(t) j(j)
gen ub=b+1.645*se
gen lb=b-1.645*se

* Figure
replace t=t+1/12 if j==2
replace t=t+2/12 if j==3
twoway  (rcap ub lb t if j==1, lwidth(thick) lc(dknavy %100)) ///
  (scatter b t if j==1, ms(O) msize(large) mfc(dknavy) mlc(dknavy)) ///
  (rcap ub lb t if j==2, lwidth(thick) lc(maroon %100)) ///
  (scatter b t if j==2, ms(T) msize(large) mfc(maroon) mlc(maroon)) ///
  (rcap ub lb t if j==3, lwidth(thick) lc(dkorange %100)) ///
  (scatter b t if j==3, ms(D) msize(large) mfc(dkorange) mlc(dkorange)), ///
  ytitle("Magnification elasticities", size(large)) xtitle("Event time (in years)",size(large) margin(small)) xsize(8)  ///
  ylab(, labsize(large) nogrid) xlab(-2(.5)3, labsize(large)) graphregion(color(white)) yline(0, lcolor(black)) ///
  legend(pos(5) ring(0) order(2 "b{subscript:g,r} {it:x} state fixed effects" 4 "(r,t) fixed effects" 6 "(g,t) fixed effects") )
  graph export "../Figures/fig_spec.pdf", as(pdf) replace

restore

************************************************
* Figure at different cutoffs
************************************************

* Figure places cut=100 (j=0), then cut=105 (j=1), then cut=110 (j=2)

*** Put together all data and prepare for estimation

foreach cut in 100 110 {

  *** Put data together

  * Bringing in wage data by labor bin
  use Output/MORG_CMcomplete_bian.dta, clear

  * Bringing in state-bian real minimum wages
  merge m:1 statefip year bian using Output/state_yb_combo.dta // Constructed in ConstructingMW-bian.do
  assert (year<1979 | year>2016 | year==1994 | (year==1995 & bian==1) | statefip==11)  if _merge~=3
  drop if _merge~=3
  drop _merge

  * Bring in GDP deflator to create log(real mw)
  merge m:1 year bian using Output/gdpDeflatorFRED_bian.dta // Constructed in this do file above
  assert (year<1979 | year>2016 | year==1994 | (year==1995 & bian==1))  if _merge~=3
  drop if _merge~=3
  drop _merge
  replace mw=100*mw/gdp // create real minimum wage
  drop gdp
  replace mw=ln(mw)
  label variable mw "real state-yb mw (gdp deflator)"

  * Bring in the bite measures
  merge m:1 statefip year bian lbin using Output/MORG`cut'_shares_bian.dta // Constructed in this do file above
  assert (wage==. | lnwage==. | weight==0 | weight==.) if _merge~=3
  drop _merge

  * Create leave-out version of median: requires installing rangestat: ssc install rangestat
  egen group_sl=group(state lbin)
  egen low=min(yb)
  egen high=max(yb)
  rangestat (median) bound = share`cut', by(group) excludeself interval(yb, low, high)

  * Create time-invariant weight
  bys statefips lbin: egen Weight=mean(weight)
  gen count=(wage~=. & hours~=0 & inc~=0)
  bys lbin statefip: egen num=sum(count)
  sum num, det
  gen keep=(num==r(max)) // indicator if state x group present in all periods
  egen state_lbin=group(state lbin)
  drop count
  tsset state_lbin yb
  * yb does take into account the jump btw 1993q4 and 1995q4

  gen age=real(substr(lbin,1,1))
  gen male=real(substr(lbin,2,1))
  gen white=real(substr(lbin,3,1))
  gen edu=real(substr(lbin,4,1))
  egen LBIN=group(lbin)

  * winsorize lnwage separately by (state x lbin)
  gen lnwage_w=lnwage
  winsor2 lnwage_w, replace cuts(2 98) by(statefips lbin)

  *** Prepare for regression analysis

  tsset state_lbin yb

  local maxlag=${maxlag}
  local maxlead=${maxlead}
  local maxlag1=`maxlag'-1
  gen dw_s = lnwage_w-L${diff}.lnwage_w
  gen Dweight=(weight*L${diff}.weight)/(weight+L${diff}.weight)  // weight is the weighted number of workers

  forval Lag=0(1)`maxlag1' {
    local Lag1=`Lag'+1
    gen dmw_`Lag'=L`Lag'.mw-L`Lag1'.mw
	gen dz_Inter_`Lag'=dmw_`Lag'*L`Lag1'.bound
	gen d_Inter_`Lag'=dmw_`Lag'*L`Lag1'.share`cut'
  }
  forval Lead=1(1)$maxlead {
    local Lead1=`Lead'-1
    gen dFmw_`Lead'=F`Lead'.mw-F`Lead1'.mw
	gen dz_FInter_`Lead'=dFmw_`Lead'*F`Lead1'.bound
	gen d_FInter_`Lead'=dFmw_`Lead'*F`Lead1'.share`cut'
  }

  ivreghdfe dw_s (d_Inter_* d_FInter_* = dz_Inter* dz_FInter* ) if keep==1 [aw=Dweight], absorb(i.yb i.yb#c.bound) cluster(statefip)
  local maxlag1=$diff - 2

  gen bL_0=_b[d_Inter_0]
  gen seL_0=_se[d_Inter_0]
  forval z=0(1)`maxlag1' {
    local k=`z'+1
    lincom (d_Inter_`z'+d_Inter_`k')/2
    gen bL_`k'=r(estimate)
    gen seL_`k'=r(se)
  }
  forval z=1(1)4 {
    local k=`z'+1
    lincom (-d_FInter_`z'-d_FInter_`k')/2  // converting changes to levels for leads
    gen bF_`z'=r(estimate)
    gen seF_`z'=r(se)
  }
  keep bL* bF* se*
  duplicates drop

  * reshaping
  gen c=1
  reshape long bL_ bF_ seL_ seF_  , i(c) j(t)
  * put lag and lead together
  gen T=-t
  save /tmp/tmp.dta, replace
  keep t bL seL
  append using /tmp/tmp.dta
  drop if c==1 & bF==.
  replace t=T if T~=.
  replace bL = bF if bF~=.
  replace seL = seF if seF~=.
  drop c bF seF T
  rename se* se
  rename b* b
  gen ub=b+1.645*se
  gen lb=b-1.645*se

  replace t=t/2
  
  * Use j to order and track the three sets of results w/ different cutoffs
  
  if `cut'==100 {
    gen j=0
    append using "../Figures/temp/cut_comparison.dta"
	replace j=1 if j==.
	save "../Figures/temp/cut_comparison.dta", replace
  }
  if `cut'==110 {
    gen j=2
    append using "../Figures/temp/cut_comparison.dta"
	save "../Figures/temp/cut_comparison.dta", replace
  }
}

**** Create the figure with all three sets of results together
replace t=t+j/9
twoway  (rcap ub lb t if j==0, lwidth(thick) lc(dknavy %100)) ///
  (scatter b t if j==0, ms(O) msize(large) mfc(dknavy) mlc(dknavy)) ///
  (rcap ub lb t if j==1, lwidth(thick) lc(maroon %100)) ///
  (scatter b t if j==1, ms(T) msize(large) mfc(maroon) mlc(maroon)) ///
  (rcap ub lb t if j==2, lwidth(thick) lc(dkorange %100)) ///
  (scatter b t if j==2, ms(D) msize(large) mfc(dkorange) mlc(dkorange)), ///
  ytitle("Magnification elasticities", size(large)) xtitle("Event time (in years)",size(large) margin(small)) xsize(8)  ///
  ylab(, labsize(large) nogrid) xlab(-2(.5)3, labsize(large)) graphregion(color(white)) yline(0, lcolor(black)) ///
  legend(pos(5) ring(0) order(2 "Cutoff of 1.00" 4 "Cutoff of 1.05" 6 "Cutoff of 1.10") )
  graph export "../Figures/fig_cutoffs.pdf", as(pdf) replace


************************************************
************************************************
* Results at shorter and longer time differences
************************************************
************************************************

foreach d in 6 8 {

  global diff=`d' // periods of difference in the DV [6 - 8, using 7 with 6 and 8 as robustness]
  global maxlag=${diff}+6  // number of lagged shocks (including current) [${diff} +6]
  global maxlead=${diff} // number of lead shocks [${diff} report first 4]

  *** Put data together

  * Bringing in wage data by labor bin
  use Output/MORG_CMcomplete_bian.dta, clear // Constructed in this do file above

  * Bringing in state-bian real minimum wages
  merge m:1 statefip year bian using Output/state_yb_combo.dta // Constructed in ConstructingMW-bian.do
  assert (year<1979 | year>2016 | year==1994 | (year==1995 & bian==1) | statefip==11)  if _merge~=3
  drop if _merge~=3
  drop _merge

  * Bring in GDP deflator to create log(real mw)
  merge m:1 year bian using Output/gdpDeflatorFRED_bian.dta // Constructed in this do file above
  assert (year<1979 | year>2016 | year==1994 | (year==1995 & bian==1))  if _merge~=3
  drop if _merge~=3
  drop _merge
  replace mw=100*mw/gdp // create real minimum wage
  drop gdp
  replace mw=ln(mw)
  label variable mw "real state-yb mw (gdp deflator)"

  * Bring in the bite measures
  merge m:1 statefip year bian lbin using Output/MORG105_shares_bian.dta // Constructed in this do file above
  assert (wage==. | lnwage==. | weight==0 | weight==.) if _merge~=3
  drop _merge

  * Create leave-out version of median: requires installing rangestat: ssc install rangestat
  egen group_sl=group(state lbin)
  egen low=min(yb)
  egen high=max(yb)
  rangestat (median) bound = share105, by(group) excludeself interval(yb, low, high)

  * Create time-invariant weight
  bys statefips lbin: egen Weight=mean(weight)
  gen count=(wage~=. & hours~=0 & inc~=0)
  bys lbin statefip: egen num=sum(count)
  sum num, det
  gen keep=(num==r(max)) // indicator if state x group present in all periods
  egen state_lbin=group(state lbin)
  drop count
  tsset state_lbin yb
  * yb does take into account the jump btw 1993q4 and 1995q4

  gen age=real(substr(lbin,1,1))
  gen male=real(substr(lbin,2,1))
  gen white=real(substr(lbin,3,1))
  gen edu=real(substr(lbin,4,1))
  egen LBIN=group(lbin)

  * winsorize lnwage separately by (state x lbin)
  gen lnwage_w=lnwage
  winsor2 lnwage_w, replace cuts(2 98) by(statefips lbin)

  *** Prepare for regression analysis

  tsset state_lbin yb

  local maxlag=${maxlag}
  local maxlead=${maxlead}
  local maxlag1=`maxlag'-1
  gen dw_s = lnwage_w-L${diff}.lnwage_w
  gen Dweight=(weight*L${diff}.weight)/(weight+L${diff}.weight)  // weight is the weighted number of workers

  forval Lag=0(1)`maxlag1' {
    local Lag1=`Lag'+1
    gen dmw_`Lag'=L`Lag'.mw-L`Lag1'.mw
	gen dz_Inter_`Lag'=dmw_`Lag'*L`Lag1'.bound
	gen d_Inter_`Lag'=dmw_`Lag'*L`Lag1'.share105
  }
  forval Lead=1(1)$maxlead {
    local Lead1=`Lead'-1
    gen dFmw_`Lead'=F`Lead'.mw-F`Lead1'.mw
	gen dz_FInter_`Lead'=dFmw_`Lead'*F`Lead1'.bound
	gen d_FInter_`Lead'=dFmw_`Lead'*F`Lead1'.share105
  }

  ivreghdfe dw_s (d_Inter_* d_FInter_* = dz_Inter* dz_FInter* ) if keep==1 [aw=Dweight], absorb(i.yb i.yb#c.bound) cluster(statefip)
  local maxlag1=$diff - 2

  gen bL_0=_b[d_Inter_0]
  gen seL_0=_se[d_Inter_0]
  forval z=0(1)`maxlag1' {
    local k=`z'+1
    lincom (d_Inter_`z'+d_Inter_`k')/2
    gen bL_`k'=r(estimate)
    gen seL_`k'=r(se)
  }
  forval z=1(1)4 {
    local k=`z'+1
    lincom (-d_FInter_`z'-d_FInter_`k')/2  // converting changes to levels for leads
    gen bF_`z'=r(estimate)
    gen seF_`z'=r(se)
  }
  keep bL* bF* se*
  duplicates drop

  * reshaping
  gen c=1
  reshape long bL_ bF_ seL_ seF_  , i(c) j(t)
  * put lag and lead together
  gen T=-t
  save /tmp/tmp.dta, replace
  keep t bL seL
  append using /tmp/tmp.dta
  drop if c==1 & bF==.
  replace t=T if T~=.
  replace bL = bF if bF~=.
  replace seL = seF if seF~=.
  drop c bF seF T
  rename se* se
  rename b* b
  gen ub=b+1.645*se
  gen lb=b-1.645*se

  replace t=t/2
  
  local x=${diff}/2-.5
  twoway (rcap ub lb t, lwidth(thick) lc(dknavy %100)) ///
    (scatter b t, ms(O) msize(vlarge) mfc(red) mlc(dknavy)), ///
    ytitle("Magnification elasticities", size(large)) xtitle("Event time (in years)",size(large) margin(small)) xsize(8)  ///
    ylab(, labsize(large) nogrid) xlab(-2(.5)`x', labsize(large)) graphregion(color(white)) yline(0, lcolor(black)) ///
    legend(off) 
    graph export "../Figures/fig_diff${diff}.pdf", as(pdf) replace

}

* re-set diff (and corresponding variables) back to baseline levels (in case use)
global diff=${diff_fixed}
global maxlag=${diff}+6  
global maxlead=${diff}

  
************************************************
************************************************
* Summary statistics 
************************************************
************************************************

************************************************
* Distribution of half-year changes in real minmum wages
************************************************

use Output/state_yb_combo.dta, clear // Constructed in ConstructingMW-bian.do
merge m:1 year bian using Output/gdpDeflatorFRED_bian.dta // Constructed in this do file above
assert (year<1979 | year>2016)  if _merge~=3
drop if _merge~=3 | year<1979 // I will use minimum wage changes over the sample period (not earlier), but also including 1994 and all of 1995
drop _merge
replace mw=100*mw/gdp // create real minimum wage
drop gdp
replace mw=log(mw)

* create the yb variable (note that I include 1994 and first half of 1995)
sort year bian
egen yb=group(year bian)
assert yb==(year-1979)*2+bian

* tsset the data for changes
tsset state yb

* construct the state-and-time changes
gen dmw=mw-L1.mw

label variable dmw "Half-year log change in the real minimum wage"
histogram dmw, density width(0.01) graphregion(color(white)) xlab(, labsize(vlarge)) ylab(,  nogrid) color(gray) ///
  xsize(8) ytitle(, size(huge)) xtitle(, size(huge))
  graph export "../Figures/histogram_dmw.pdf", as(pdf) replace

************************************************
* Distribution of minimum wage bites in balanced sample
* Distribution of 3.5-year changes in real wages
************************************************

**** Bring in all the data in order to have weight and variable keep defined

* Bringing in wage data by labor bin
use Output/MORG_CMcomplete_bian.dta, clear // Constructed in this do file above

* Bringing in state-bian real minimum wages
merge m:1 statefip year bian using Output/state_yb_combo.dta // Constructed in ConstructingMW-bian.do
assert (year<1979 | year>2016 | year==1994 | (year==1995 & bian==1) | statefip==11)  if _merge~=3
drop if _merge~=3
drop _merge

merge m:1 year bian using Output/gdpDeflatorFRED_bian.dta // Constructed in this do file above
assert (year<1979 | year>2016 | year==1994 | (year==1995 & bian==1))  if _merge~=3
drop if _merge~=3
drop _merge

* Bring in the bite measures
merge m:1 statefip year bian lbin using Output/MORG105_shares_bian.dta // Constructed in this do file above
assert (wage==. | lnwage==. | weight==0 | weight==.) if _merge~=3
drop _merge

* Create indicator for balanced sample
gen count=(wage~=. & hours~=0 & inc~=0)
bys lbin statefip: egen num=sum(count)
sum num, det
gen keep=(num==r(max)) // indicator if state x group present in all periods

* Create indicators for education characteristics
gen edu=real(substr(lbin,4,1))

* winsorize lnwage separately by (state x lbin)
gen lnwage_w=lnwage
winsor2 lnwage_w, replace cuts(2 98) by(statefips lbin)

* Keep data on balanced sample only
keep if keep==1

*** Manually input these 8 numbers into the file 
sum share [aw=weight]
sum share [aw=weight] if col==1
sum share [aw=weight] if col==0 
sum share [aw=weight] if edu==1

*** Create the minimum wage bind histogram
label variable share105 "Share of g,r,t wage income {&le} 1.05 {it:x} minimum wage"
histogram share, density width(0.01) graphregion(color(white)) xlab(, labsize(vlarge)) ylab(, labsize(vlarge) nogrid) color(gray) ///
  xsize(8) ytitle(, size(huge)) xtitle(, size(huge))
  graph export "../Figures/histogram_bind.pdf", as(pdf) replace

* tsset the data for changes
egen state_lbin=group(statefips lbin)
tsset state_lbin yb
gen dw=lnwage_w-L7.lnwage_w
label variable dw "3.5-year log change in the real wage"

*** Create the 3.5-year real wage change histogram
histogram dw, density width(0.01) graphregion(color(white)) xlab(, labsize(vlarge)) ylab(,  nogrid) color(gray) ///
  xsize(8) ytitle(, size(huge)) xtitle(, size(huge))
  graph export "../Figures/histogram_dw.pdf", as(pdf) replace
sum dw, det
